In [3]:
import pandas as pd
import matplotlib as plt
%matplotlib inline
In [4]:
df= pd.read_csv("usbaby_NationalNames.csv")
In [5]:
df.head()
Out[5]:
In [6]:
df.tail()
Out[6]:
In [7]:
df.columns.values
Out[7]:
In [8]:
df.groupby('Gender')['Name'].describe()# top: the most common; freq: how often the most common names appear
Out[8]:
In [9]:
df['Year'].value_counts()
Out[9]:
In [10]:
df[['Name', 'Count']].sort_values(by='Count',ascending=True).head(5)
Out[10]:
In [27]:
recent=df[(df['Year'] > 1979) & (df['Year'] <1990)]
recent['Name'].describe()
#print("The most common names for babies born from 1980-1989 is Terrence")
Out[27]:
In [35]:
df_2014=df[df['Year']==2014]
df_2014.head()
Out[35]:
In [57]:
df_2014['Gender'].value_counts().plot(kind='bar')
Out[57]:
In [76]:
starts_with_t = df['Name'].str.startswith("T")
df[starts_with_t].head()
Out[76]:
In [77]:
df['Name'].str.startswith("T").value_counts()
Out[77]:
In [63]:
df['Gender'].value_counts()
Out[63]:
In [ ]:
plt.style.use("fivethirtyeight")
df_2014=df[df['Year']==2014]
df_2014.plot(kind='barh', x='Name', y='Count', legend=False)
In [ ]: